Reading in the raw counts matrix and meta data, populating the infercnv object
infercnv_obj = CreateInfercnvObject(
raw_counts_matrix="oligodendroglioma_expression_downsampled.counts.matrix",
annotations_file="oligodendroglioma_annotations_downsampled.txt",
delim="\t",
gene_order_file="gencode_downsampled.txt",
ref_group_names=c("Microglia/Macrophage","Oligodendrocytes (non-malignant)"))
Removing those genes that are very lowly expressed or present in very few cells
# filter out low expressed genes
cutoff=1
infercnv_obj <- require_above_min_mean_expr_cutoff(infercnv_obj, cutoff)
## INFO [2018-09-14 11:04:12] ::above_min_mean_expr_cutoff:Start
## INFO [2018-09-14 11:04:12] ::process_data:Averages (counts).
## INFO [2018-09-14 11:04:12] Removing 3025 genes from matrix as below mean expr threshold: 1
# filter out bad cells
min_cells_per_gene=3
infercnv_obj <- require_above_min_cells_ref(infercnv_obj, min_cells_per_gene=min_cells_per_gene)
## INFO [2018-09-14 11:04:12] no genes removed due to min cells/gene filter
## for safe keeping
infercnv_orig_filtered = infercnv_obj
#plot_mean_chr_expr_lineplot(infercnv_obj)
save('infercnv_obj', file = 'infercnv_obj.orig_filtered')
infercnv_obj <- infercnv:::normalize_counts_by_seq_depth(infercnv_obj)
Suggested by Matan for removing noisy variation at low counts
infercnv_obj <- infercnv:::anscombe_transform(infercnv_obj)
save('infercnv_obj', file='infercnv_obj.anscombe')
infercnv_obj <- log2xplus1(infercnv_obj)
## INFO [2018-09-14 11:04:13] transforming log2xplus1()
save('infercnv_obj', file='infercnv_obj.log_transformed')
threshold = mean(abs(get_average_bounds(infercnv_obj)))
infercnv_obj <- apply_max_threshold_bounds(infercnv_obj, threshold=threshold)
## INFO [2018-09-14 11:04:14] ::process_data:setting max centered expr, threshold set to: +/-: 4.33594984201374
plot_cnv(infercnv_obj,
output_filename='infercnv.logtransf',
x.range="auto",
title = "Before InferCNV (filtered & log2 transformed)",
color_safe_pal = FALSE,
x.center = mean(infercnv_obj@expr.data))
knitr::include_graphics("infercnv.logtransf.png")
infercnv_obj = smooth_by_chromosome(infercnv_obj, window_length=101, smooth_ends=TRUE)
## INFO [2018-09-14 11:04:37] ::smooth_window:Start.
save('infercnv_obj', file='infercnv_obj.smooth_by_chr')
# re-center each cell
infercnv_obj <- center_cell_expr_across_chromosome(infercnv_obj, method = "median")
## INFO [2018-09-14 11:04:39] ::center_smooth across chromosomes per cell
save('infercnv_obj', file='infercnv_obj.cells_recentered')
plot_cnv(infercnv_obj,
output_filename='infercnv.chr_smoothed',
x.range="auto",
title = "chr smoothed and cells re-centered",
color_safe_pal = FALSE)
knitr::include_graphics("infercnv.chr_smoothed.png")
infercnv_obj <- subtract_ref_expr_from_obs(infercnv_obj, inv_log=TRUE)
## INFO [2018-09-14 11:05:03] ::subtract_ref_expr_from_obs:Start
## INFO [2018-09-14 11:05:03] inverting log2xplus1()
## INFO [2018-09-14 11:05:03] subtracting mean(normal) per gene per cell across all data
## INFO [2018-09-14 11:05:36] transforming log2xplus1()
save('infercnv_obj', file='infercnv_obj.ref_subtracted')
plot_cnv(infercnv_obj,
output_filename='infercnv.ref_subtracted',
x.range="auto",
title="ref subtracted",
color_safe_pal = FALSE)
knitr::include_graphics("infercnv.ref_subtracted.png")
Converting the log(FC) values to regular fold change values, centered at 1 (no fold change)
This is important because we want (1/2)x to be symmetrical to 1.5x, representing loss/gain of one chromosome region.
infercnv_obj <- invert_log2(infercnv_obj)
## INFO [2018-09-14 11:05:58] invert_log2(), computing 2^x
save('infercnv_obj', file='infercnv_obj.inverted_log')
plot_cnv(infercnv_obj,
output_filename='infercnv.inverted',
color_safe_pal = FALSE,
x.range="auto",
x.center=1,
title = "inverted log FC to FC")
knitr::include_graphics("infercnv.inverted.png")
infercnv_obj <- clear_noise_via_ref_mean_sd(infercnv_obj, sd_amplifier = 1.5)
## INFO [2018-09-14 11:06:21] :: **** clear_noise_via_ref_quantiles **** : removing noise between bounds: 0.928098652851 - 1.07402608145631
save('infercnv_obj', file='infercnv_obj.denoised')
plot_cnv(infercnv_obj,
output_filename='infercnv.denoised',
x.range="auto",
x.center=1,
title="denoised",
color_safe_pal = FALSE)
knitr::include_graphics("infercnv.denoised.png")
This generally improves on the visualization
infercnv_obj = remove_outliers_norm(infercnv_obj)
## INFO [2018-09-14 11:06:53] ::remove_outlier_norm:Start out_method: average_bound lower_bound: NA upper_bound: NA
## INFO [2018-09-14 11:06:53] ::remove_outlier_norm using method: average_bound for defining outliers.
save('infercnv_obj', file="infercnv_obj.outliers_removed")
plot_cnv(infercnv_obj,
output_filename='infercnv.outliers_removed',
color_safe_pal = FALSE,
x.range="auto",
x.center=1,
title = "outliers removed")
knitr::include_graphics("infercnv.outliers_removed.png")
Runs a t-Test comparing tumor/normal for each patient and normal sample, and masks out those genes that are not significantly DE.
load('infercnv_obj.final')
plot_data = infercnv_obj@expr.data
high_threshold = max(abs(quantile(plot_data[plot_data != 0], c(0.05, 0.95))))
low_threshold = -1 * high_threshold
infercnv_obj2 <- infercnv:::mask_non_DE_genes_basic(infercnv_obj, test.use = 't', center_val=1)
## INFO [2018-09-14 11:07:18] Finding DE genes between malignant_MGH36 and Microglia/Macrophage
## INFO [2018-09-14 11:07:20] Found 3217 genes as DE
## INFO [2018-09-14 11:07:20] Finding DE genes between malignant_MGH36 and Oligodendrocytes (non-malignant)
## INFO [2018-09-14 11:07:21] Found 2898 genes as DE
## INFO [2018-09-14 11:07:21] Finding DE genes between malignant_MGH53 and Microglia/Macrophage
## INFO [2018-09-14 11:07:22] Found 2590 genes as DE
## INFO [2018-09-14 11:07:22] Finding DE genes between malignant_MGH53 and Oligodendrocytes (non-malignant)
## INFO [2018-09-14 11:07:23] Found 2179 genes as DE
## INFO [2018-09-14 11:07:23] Finding DE genes between malignant_93 and Microglia/Macrophage
## INFO [2018-09-14 11:07:25] Found 2584 genes as DE
## INFO [2018-09-14 11:07:25] Finding DE genes between malignant_93 and Oligodendrocytes (non-malignant)
## INFO [2018-09-14 11:07:26] Found 2002 genes as DE
## INFO [2018-09-14 11:07:26] Finding DE genes between malignant_97 and Microglia/Macrophage
## INFO [2018-09-14 11:07:27] Found 2565 genes as DE
## INFO [2018-09-14 11:07:27] Finding DE genes between malignant_97 and Oligodendrocytes (non-malignant)
## INFO [2018-09-14 11:07:28] Found 2400 genes as DE
plot_cnv(infercnv_obj2,
output_filename='infercnv.non-DE-genes-masked',
color_safe_pal = FALSE,
x.range=c(low_threshold, high_threshold),
x.center=1,
title = "non-DE-genes-masked")
knitr::include_graphics("infercnv.non-DE-genes-masked.png")